options(timeout=180)
library(ggspavis)
library(openxlsx)
library(dplyr)
library(scran)
library(scater)
library(SpatialExperiment)
library(TENxVisiumData)
library(SpatialExperiment)
library(SpatialFeatureExperiment)
library(Voyager)
library(patchwork)
library(nnSVG)
library(here)
spe <- MouseBrainSagittalAnterior()
spe <- spe[, colData(spe)$sample_id == "MouseBrainSagittalAnterior1"]
Subset to keep only spots over tissue
spe <- spe[, int_colData(spe)$spatialData$in_tissue == TRUE]
Identify mitochondrial genes
is_mito <- grepl("(^MT-)|(^mt-)", rowData(spe)$symbol)
spe <- addPerCellQC(spe, subsets = list(mito = is_mito))
Select QC thresholds sum: Library size (represents the total sum of UMI counts per spot). detected: The number of expressed features (refers to the number of genes with non-zero UMI counts per spot). subsets_mito_percent: A high proportion of mitochondrial reads indicates cell damage.
To select the QC thresholds we first plot them and choose the thresholds visually.
hist(colData(spe)$sum, breaks = 20, col = "skyblue", border = "white",
xlab = "UMI Counts per Spot", ylab = "Frequency",
main = "Distribution of UMI Counts per Spot")
grid(col = "gray", lwd = 0.5)
hist(colData(spe)$detected, breaks = 20, col = "skyblue", border = "white",
xlab = "Number of genes with non-zero UMI counts per spot", ylab = "Frequency",
main = "Distribution of number of genes with non-zero UMI counts per spot")
grid(col = "gray", lwd = 0.5)
hist(colData(spe)$subsets_mito_percent, breaks = 20, col = "skyblue", border = "white",
xlab = "Percentage of mitochondrial reads", ylab = "Frequency",
main = "Distribution of percentage of mitochondrial reads")
grid(col = "gray", lwd = 0.5)
qc_lib_size <- colData(spe)$sum < 2000
qc_detected <- colData(spe)$detected < 1000
qc_mito <- colData(spe)$subsets_mito_percent > 30
Number of discarded spots for each metric
apply(cbind(qc_lib_size, qc_detected, qc_mito), 2, sum)
## qc_lib_size qc_detected qc_mito
## 26 23 14
discard <- qc_lib_size | qc_detected | qc_mito
table(discard)
## discard
## FALSE TRUE
## 2655 40
colData(spe)$discard <- discard
Plot discarded spots
# check spatial pattern of combined set of discarded spots
plotVisium(spe, x_coord = "pxl_row_in_fullres", y_coord = "pxl_col_in_fullres",
fill = "discard")
Filter low-quality spots
spe <- spe[, !colData(spe)$discard]
spe <- computeLibraryFactors(spe)
spe <- logNormCounts(spe)
spe <- spe[!is_mito, ]
Now, let’s keep only highly variable genes.
# fit mean-variance relationship
dec <- modelGeneVar(spe, assay.type = "logcounts")
# select top HVGs
top_hvgs <- getTopHVGs(dec, prop = 0.9)
spe <- spe[rownames(spe) %in% top_hvgs,]
fit <- metadata(dec)
plot(fit$mean, fit$var,
xlab = "mean of log-expression", ylab = "variance of log-expression")
curve(fit$trend(x), col = "dodgerblue", add = TRUE, lwd = 2)
# compute PCA
set.seed(123)
spe <- runPCA(spe, subset_row = top_hvgs)
reducedDimNames(spe)
## [1] "PCA"
# graph-based clustering
set.seed(123)
k <- 28
g <- buildSNNGraph(spe, k = k, use.dimred = "PCA")
g_walk <- igraph::cluster_walktrap(g)
clus <- g_walk$membership
table(clus)
## clus
## 1 2 3 4 5 6 7 8 9
## 424 210 320 581 476 314 129 78 123
colLabels(spe) <- factor(clus)
saveRDS(spe, file="spe.rds")
Cellmarker data from http://bio-bigdata.hrbmu.edu.cn/CellMarker/CellMarker_download.html
cellmarkers <- read.xlsx('Cell_marker_Mouse.xlsx', rowNames =F)
cell_types <- cellmarkers %>% filter(tissue_class=='Brain' & cell_type=="Normal cell") %>% select(cell_name)
Extract Gene Set info from the xlsx file
cell_types <- cellmarkers %>% filter(tissue_class=='Brain' & cell_type=="Normal cell") %>% dplyr::count(cell_name) %>% arrange(desc(n)) %>% filter(n>1) %>% select(cell_name)
cell_types <- cell_types$cell_name
# filter genes symbol info on the xlsx file
cm_sets <- lapply(cell_types, function(x) cellmarkers %>%
filter(tissue_class=='Brain' &
cell_type=="Normal cell" &
cell_name==x &
!(is.na(Symbol))) %>% pull(Symbol) %>% unique())
cell_types <- unique(make.names(cell_types))
names(cm_sets) <- cell_types
set_names <- names(cm_sets)
Create list with geneset names from SpatialExperiment object
genesetlist <- lapply(set_names, function(x) {
geneset <- rownames(rowData(spe)[rowData(spe)$symbol %in% cm_sets[[x]], ,drop=FALSE])
})
names(genesetlist) <- set_names
genesetlist <- genesetlist[sapply(genesetlist, function(x) length(x) > 1)]
saveRDS(genesetlist, file="RDS_files/genesets.rds")
# Function for comparison between GSVA scores and clusters
source("gsva_cluster_tests.R")
# SpatialExperiment object with GSVA scores
res <- readRDS("RDS_files/gsva_res.rds")
gsva_sfe <- toSpatialFeatureExperiment(res)
colGraph(gsva_sfe, "visium", sample_id = "MouseBrainSagittalAnterior1") <- findVisiumGraph(gsva_sfe, sample_id = "MouseBrainSagittalAnterior1", zero.policy = TRUE)
assays(gsva_sfe)$logcounts <- assays(gsva_sfe)$es
moran_res <- runUnivariate(gsva_sfe, type = "moran", colGraphName = "visium")
rowData(moran_res)[order(rowData(moran_res)$moran_MouseBrainSagittalAnterior1,decreasing = TRUE),][1:10,2:3]
## DataFrame with 10 rows and 2 columns
## moran_MouseBrainSagittalAnterior1
## <numeric>
## Myelinating.stem.cell 0.843874
## Striatopallidal.loop.cell 0.815366
## Vascular.leptomeningeal.cell 0.814274
## Synaptic.cell 0.784577
## Myelinating.oligodendrocyte 0.771354
## Early.GABAergic.neuron 0.760723
## D1.Medium.spiny.neuron.D1.MSN. 0.756597
## Astrocyte 0.749458
## Satellite.glial.cell 0.748608
## Oligodendrocyte 0.721197
## K_MouseBrainSagittalAnterior1
## <numeric>
## Myelinating.stem.cell 1.56658
## Striatopallidal.loop.cell 4.16227
## Vascular.leptomeningeal.cell 4.82437
## Synaptic.cell 5.09456
## Myelinating.oligodendrocyte 4.28530
## Early.GABAergic.neuron 3.38674
## D1.Medium.spiny.neuron.D1.MSN. 6.13580
## Astrocyte 4.34787
## Satellite.glial.cell 4.28117
## Oligodendrocyte 10.92995
spe_nnSVG <- readRDS("RDS_files/nnsvg_res.rds")
rowData(spe_nnSVG)[order(rowData(spe_nnSVG)$rank,decreasing = TRUE),][1:10,2:15]
## DataFrame with 10 rows and 14 columns
## sigma.sq tau.sq phi loglik
## <numeric> <numeric> <numeric> <numeric>
## Disease.associated.microglial.cell 0.00477849 0.2838391 1.44804 -2114.817
## Neuron.glial.antigen.2.NG2..cell 0.00674967 0.1283583 13.16504 -1098.976
## Vascular.endothelial.cell 0.00933031 0.2186103 4.58197 -1792.096
## Hemogenic.endothelial.cell 0.02153829 0.1551948 37.53960 -1447.578
## Homeostatic.microglial.cell 0.02035685 0.2154609 19.84606 -1828.021
## Monocyte.derived.macrophage 0.00715999 0.0839015 11.17766 -559.162
## Plasmacytoid.dendritic.cell.pDC. 0.00613245 0.0923885 1.03317 -659.607
## Border.associated.macrophage 0.00537891 0.0614069 7.66768 -141.157
## Hippocampal.granule.precursor.cell 0.03256509 0.1749743 23.39356 -1628.272
## Venous.cell 0.02253369 0.1891998 6.89010 -1651.130
## runtime mean var spcov
## <numeric> <numeric> <numeric> <numeric>
## Disease.associated.microglial.cell 1.887 NA NA NA
## Neuron.glial.antigen.2.NG2..cell 1.411 NA NA NA
## Vascular.endothelial.cell 1.943 NA NA NA
## Hemogenic.endothelial.cell 2.881 NA NA NA
## Homeostatic.microglial.cell 1.881 NA NA NA
## Monocyte.derived.macrophage 1.781 NA NA NA
## Plasmacytoid.dendritic.cell.pDC. 0.426 NA NA NA
## Border.associated.macrophage 0.806 NA NA NA
## Hippocampal.granule.precursor.cell 3.129 NA NA NA
## Venous.cell 1.820 NA NA NA
## prop_sv loglik_lm LR_stat rank
## <numeric> <numeric> <numeric> <numeric>
## Disease.associated.microglial.cell 0.0165565 -2117.623 5.6125 144
## Neuron.glial.antigen.2.NG2..cell 0.0499576 -1109.867 21.7828 143
## Vascular.endothelial.cell 0.0409331 -1804.122 24.0525 142
## Hemogenic.endothelial.cell 0.1218690 -1465.925 36.6936 141
## Homeostatic.microglial.cell 0.0863245 -1848.488 40.9340 140
## Monocyte.derived.macrophage 0.0786280 -584.042 49.7586 139
## Plasmacytoid.dendritic.cell.pDC. 0.0622451 -686.198 53.1835 138
## Border.associated.macrophage 0.0805398 -169.203 56.0925 137
## Hippocampal.granule.precursor.cell 0.1569104 -1673.804 91.0644 136
## Venous.cell 0.1064248 -1698.438 94.6170 135
## pval padj
## <numeric> <numeric>
## Disease.associated.microglial.cell 6.04313e-02 6.04313e-02
## Neuron.glial.antigen.2.NG2..cell 1.86179e-05 1.87481e-05
## Vascular.endothelial.cell 5.98517e-06 6.06946e-06
## Hemogenic.endothelial.cell 1.07668e-08 1.09959e-08
## Homeostatic.microglial.cell 1.29211e-09 1.32903e-09
## Monocyte.derived.macrophage 1.56692e-11 1.62329e-11
## Plasmacytoid.dendritic.cell.pDC. 2.82718e-12 2.95010e-12
## Border.associated.macrophage 6.60139e-13 6.93868e-13
## Hippocampal.granule.precursor.cell 0.00000e+00 0.00000e+00
## Venous.cell 0.00000e+00 0.00000e+00
top_nnSVG_genesets <- rownames(rowData(spe_nnSVG)[order(rowData(spe_nnSVG)$rank,decreasing = FALSE),][1:10,])
plot_list_hist_gsva_nnsvg <- lapply(top_nnSVG_genesets, function(x){
pl <- list(x,plotVisium(res,y_coord = "pxl_col_in_fullres", x_coord = "pxl_row_in_fullres", fill = x, assay = "es",sample_ids=NULL) + ggtitle(paste0(x," GSVA scores superposed in histological image" ))+ theme(text = element_text(size = 7)))
pl
})
for (i in 1:length(plot_list_hist_gsva_nnsvg)){
print(plot_list_hist_gsva_nnsvg[i][[1]][[2]]+cluster_plot)
print(gsva_clusters_ttests(spe,res,plot_list_hist_gsva_nnsvg[i][[1]][[1]]))
}
## $clusters
## [1] 1 5 8
##
## $t_statistics
## t t t
## -28.40401 -22.45167 -16.48003
## $clusters
## [1] 4 6
##
## $t_statistics
## t t
## -18.34119 -51.44519
## $clusters
## [1] 4
##
## $t_statistics
## statistic.t
## -56.07121
## $clusters
## [1] 4
##
## $t_statistics
## t t
## -39.438710 -2.341196
## $clusters
## [1] 1 3 5 8
##
## $t_statistics
## t t t t
## -29.450621 -3.268205 -27.663425 -8.965769
## $clusters
## [1] 1 5 7 8 9
##
## $t_statistics
## t t t t t
## -16.12438 -10.42099 -14.85694 -17.91070 -10.12684
## $clusters
## [1] 4 6
##
## $t_statistics
## t t
## -10.60505 -39.86971
## $clusters
## [1] 4 9
##
## $t_statistics
## t t
## -58.276244 -2.583948
## $clusters
## [1] 1 5 6 8
##
## $t_statistics
## t t t t
## -6.938438 -17.410931 -9.773505 -3.636328
## $clusters
## [1] 4
##
## $t_statistics
## statistic.t
## -54.09251
most_corr<-most_cor_geneset(spe,res)
corr_plots <- lapply(1:length(unique(colData(spe)$label)), function(x){
pl <- plotVisium(res,y_coord = "pxl_col_in_fullres", x_coord = "pxl_row_in_fullres", fill = most_corr[x], assay = "es",sample_ids=NULL) + labs(subtitle=NULL)+ggtitle(paste0(most_corr[x]," GSVA scores (most correlated with cluster ",x,")" ))+ theme(text = element_text(size = 7))
})
for (i in 1:length(corr_plots)){
print(corr_plots[[i]]+cluster_plot)
}